/* 
 * Author: Chih-Chiang Tsou <chihchiang.tsou@gmail.com>
 *             Nesvizhskii Lab, Department of Computational Medicine and Bioinformatics, 
 *             University of Michigan, Ann Arbor
 *
 * Copyright 2014 University of Michigan, Ann Arbor, MI
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package MSUmpire.MathPackage;

import jMEF.PVector;

import java.util.Arrays;
import java.util.Random;
import java.util.Vector;

public class KMeans {

    private static final int MAX_ITERATIONS = 30;

    /**
     * Performs a k-means on the point set to compute k clusters (with assigned
     * first centroid).
     *
     * @param points point set
     * @param k number of clusters
     * @return clusters
     */
    public static Vector<PVector>[] run(PVector[] points, int k, PVector[] centroids) {

        int[] repartition = new int[points.length];
        Vector<PVector>[] clusters = new Vector[k];

        int it = 0;
        int[] tmp = new int[points.length];

        do {
            tmp = repartition.clone();
            repartitionStep(points, k, centroids, repartition, clusters);
            centroidStep(points, k, centroids, clusters);
            it++;
        } while (!Arrays.equals(repartition, tmp) && it < MAX_ITERATIONS);

        return clusters;
    }

    /**
     * Performs a k-means on the point set to compute k clusters.
     *
     * @param points point set
     * @param k number of clusters
     * @return clusters
     */
    public static Vector<PVector>[] run(PVector[] points, int k) {

        PVector[] centroids = initialize(points, k);
        int[] repartition = new int[points.length];
        Vector<PVector>[] clusters = new Vector[k];

        int it = 0;
        int[] tmp = new int[points.length];

        do {
            tmp = repartition.clone();
            repartitionStep(points, k, centroids, repartition, clusters);
            centroidStep(points, k, centroids, clusters);
            it++;
        } while (!Arrays.equals(repartition, tmp) && it < MAX_ITERATIONS);

        return clusters;
    }

    /**
     * Initializes the k-means by ramdomly picking points in the set.
     *
     * @param points point set
     * @param k number of clusters
     * @return clusters
     */
    private static PVector[] initialize(PVector[] points, int k) {

        // Initialize the first centroid
        PVector[] centroids = new PVector[k];
        Random rand = new Random();
        centroids[0] = (PVector) points[rand.nextInt(points.length)].clone();

        // Initialize the other centroids
        for (int i = 1; i < k; i++) {
            boolean cond = false;
            PVector tmp;
            do {
                cond = false;
                tmp = points[rand.nextInt(points.length)];
                for (int j = 0; j < i; j++) {
                    if (PVector.equals(tmp, centroids[j])) {
                        cond = true;
                        break;
                    }
                }
            } while (cond);
            centroids[i] = (PVector) tmp.clone();
        }

        // Return
        return centroids;
    }

    /**
     * Processes the repartition step.
     *
     * @param points point set
     * @param k number of clusters
     * @param centroids centroids of the clusters
     * @param repartition repartition array
     * @param clusters clusters
     */
    private static void repartitionStep(PVector[] points, int k, PVector[] centroids, int[] repartition, Vector<PVector>[] clusters) {

        // Initialization of the clusters
        for (int i = 0; i < k; i++) {
            clusters[i] = new Vector<PVector>();
        }

        // Compute repartition
        for (int i = 0; i < points.length; i++) {

            int index = 0;
            double dist = Double.MAX_VALUE;

            for (int j = 0; j < k; j++) {
                double dist_tmp = points[i].Minus(centroids[j]).norm2();
                if (dist_tmp < dist) {
                    dist = dist_tmp;
                    index = j;
                }
            }

            repartition[i] = index;
            clusters[index].add(points[i]);
        }
    }

    /**
     * Processes the centroid step.
     *
     * @param points point set
     * @param k number of clusters
     * @param centroids centroids of the clusters
     * @param clusters clusters
     */
    private static void centroidStep(PVector[] points, int k, PVector[] centroids, Vector<PVector>[] clusters) {
        for (int i = 0; i < k; i++) {
            centroids[i] = new PVector(points[0].dim);
            for (int j = 0; j < clusters[i].size(); j++) {
                centroids[i] = centroids[i].Plus((PVector) clusters[i].get(j));
            }
            centroids[i] = centroids[i].Times(1.0d / clusters[i].size());
        }
    }
}
